home *** CD-ROM | disk | FTP | other *** search
- {$A+,B-,D-,E-,F+,G-,I+,L-,N-,O-,R-,S-,V+,X+}
- Unit Dict;
- Interface
- { DICT.PAS dictionary object and methods to create and use a superimposed
- code dictionary. Copyright Edwin T. Floyd, 1990, 1991. }
- Type
- Dictionary = Object
- DictArray : Pointer; { Pointer to dictionary bit array }
- DictCount : LongInt; { Number of key entries in this dictionary }
- DictSize : Word; { Number of bytes in dictionary bit array }
- DictBits : Byte; { Number of bits per key entry }
-
- Constructor Init(MaxKeys : Word; BitsPerKey : Byte);
- { Initialize dictionary, specify maximum keys and bits per key. }
-
- Constructor RestoreDictionary(FileName : String);
- { Restore dictionary saved on disk by SaveDictionary }
-
- { Note: Use either Init or RestoreDictionary, not both. }
-
- Destructor Done;
- { Release storage allocated to dictionary. }
-
- Function DictionarySize : Word;
- { Returns number of bytes that will be written by SaveDictionary. }
-
- Procedure SaveDictionary(FileName : String);
- { Save dictionary in a disk file. }
-
- Function InsertString(Var s : String) : Boolean;
- { Insert string in dictionary; returns TRUE if string is already there. }
-
- Function StringInDictionary(Var s : String) : Boolean;
- { Returns TRUE if string is in dictionary. }
-
- Function InsertBlock(Var Data; Len : Word) : Boolean;
- { Insert block in dictionary; returns TRUE if block is already there. }
-
- Function BlockInDictionary(Var Data; Len : Word) : Boolean;
- { Returns TRUE if block is in dictionary. }
-
- Function EstError : Real;
- { Returns estimated probability of error. }
-
- Function ActError : Real;
- { Returns actual probability of error (slow, counts bits). }
-
- End;
-
- Function DictionaryBytes(MaxKeys : LongInt; BitsPerKey : Byte) : LongInt;
- { Returns the size in bytes of the optimal dictionary bit table for the
- indicated key and bit-per-key counts. }
-
- Implementation
-
- Const
- MagicNumber = $F501205E; { Used to validate dictionary save file }
-
- Type
- SaveFileHeader = Record
- { Header for dictionary save file (all numbers are byte-reversed) }
- Magic : LongInt; { Magic number for validity test }
- BitsCount : LongInt; { Bits-per-key and entry count }
- Size : Word; { Size of dictionary bit map in bytes }
- End;
-
- Function CountBits(Var InBuf; InLen : Word) : LongInt;
- External; {FAR}
-
- Function SetBloomFilter(Var InBuf; InLen : Word;
- Var BitTable; BitTableLen, BitCount : Word) : Boolean;
- External; {FAR}
-
- Function TestBloomFilter(Var InBuf; InLen : Word;
- Var BitTable; BitTableLen, BitCount : Word) : Boolean;
- External; {FAR}
-
- {$L BLOOM.OBJ }
-
- Function LongSwap(n : LongInt) : LongInt;
- { Reverse bytes in a LongInt. }
- Inline(
- $5A/ { pop dx}
- $58/ { pop ax}
- $86/$C4/ { xchg ah,al}
- $86/$D6); { xchg dh,dl}
-
- Function DictionaryBytes(MaxKeys : LongInt; BitsPerKey : Byte) : LongInt;
- Begin
- DictionaryBytes := Round(MaxKeys * BitsPerKey / (-Ln(0.5) * 8));
- End;
-
- Constructor Dictionary.Init(MaxKeys : Word; BitsPerKey : Byte);
- Var
- DictBytes : LongInt;
- Begin
- DictBytes := DictionaryBytes(MaxKeys, BitsPerKey);
- If DictBytes > $FFF0 Then Begin
- WriteLn(DictBytes, ' bytes optimal for dictionary, but ', $FFF0,
- ' is maximum size dictionary. Using max size.');
- DictBytes := $FFF0;
- End Else If DictBytes > MaxAvail Then Begin
- WriteLn(DictBytes, ' bytes optimal for dictionary, but only ', MaxAvail,
- ' bytes are available. Using ', MaxAvail);
- DictBytes := MaxAvail;
- End Else If DictBytes < 16 Then DictBytes := 16;
- DictSize := DictBytes;
- GetMem(DictArray, DictSize);
- FillChar(DictArray^, DictSize, 0);
- DictCount := 0;
- DictBits := BitsPerKey;
- End;
-
- Constructor Dictionary.RestoreDictionary(FileName : String);
- Var
- Header : SaveFileHeader;
- DictBytes : LongInt;
- f : File;
- OldMode : Byte;
- Begin
- OldMode := FileMode;
- FileMode := $40;
- Assign(f, FileName);
- Reset(f, 1);
- BlockRead(f, Header, SizeOf(Header));
- With Header Do Begin
- Magic := LongSwap(Magic);
- Size := Swap(Size);
- DictBytes := FileSize(f) - SizeOf(Header);
- If (Magic <> MagicNumber) Or (Size <> DictBytes) Or (Size < 16)
- Or (Size > $FFF0) Then Begin
- WriteLn('File ', FileName, ' is not a dictionary save file.');
- Halt(1);
- End;
- DictSize := Size;
- DictBits := BitsCount And $FF;
- DictCount := LongSwap(BitsCount And $FFFFFF00);
- GetMem(DictArray, DictSize);
- BlockRead(f, DictArray^, DictSize);
- Close(f);
- FileMode := OldMode;
- End;
- End;
-
- Destructor Dictionary.Done;
- Begin
- FreeMem(DictArray, DictSize);
- DictArray := Nil;
- DictSize := 0;
- DictBits := 0;
- DictCount := 0;
- End;
-
- Function Dictionary.DictionarySize : Word;
- Begin
- DictionarySize := DictSize + SizeOf(SaveFileHeader);
- End;
-
- Function Dictionary.InsertString(Var s : String) : Boolean;
- Begin
- InsertString := InsertBlock(s[1], Length(s));
- End;
-
- Function Dictionary.StringInDictionary(Var s : String) : Boolean;
- Begin
- StringInDictionary := BlockInDictionary(s[1], Length(s));
- End;
-
- Function Dictionary.InsertBlock(Var Data; Len : Word) : Boolean;
- Begin
- If SetBloomFilter(Data, Len, DictArray^, DictSize, DictBits)
- Then InsertBlock := True Else Begin
- Inc(DictCount);
- InsertBlock := False;
- End;
- End;
-
- Function Dictionary.BlockInDictionary(Var Data; Len : Word) : Boolean;
- Begin
- BlockInDictionary := TestBloomFilter(Data, Len, DictArray^, DictSize,
- DictBits);
- End;
-
- Procedure Dictionary.SaveDictionary(FileName : String);
- Var
- Header : SaveFileHeader;
- f : File;
- Begin
- Assign(f, FileName);
- ReWrite(f, 1);
- With Header Do Begin
- Magic := LongSwap(MagicNumber);
- Size := Swap(DictSize);
- BitsCount := LongSwap(DictCount) + DictBits;
- End;
- BlockWrite(f, Header, SizeOf(Header));
- BlockWrite(f, DictArray^, DictSize);
- Close(f);
- End;
-
- Function Dictionary.EstError : Real;
- Begin
- EstError := Exp(Ln(1.0-Exp(-(DictCount*DictBits)/(DictSize*8.0)))*DictBits);
- End;
-
- Function Dictionary.ActError : Real;
- Var
- AllBits, BitsOn : LongInt;
- Begin
- AllBits := LongInt(DictSize) * 8;
- BitsOn := CountBits(DictArray^, DictSize);
- ActError := Exp(Ln(BitsOn / AllBits) * DictBits);
- End;
-
- End.
-